##### Installation and loading of required packages #####
if(!eval(parse(text="require(pacman)")))
{
install.packages("pacman")
eval(parse(text="require(pacman)"))
}
Lade n昼㸶tiges Paket: pacman
pacman::p_load(
here,
ggplot2,
readr,
dplyr,
timetk,
hrbrthemes,
h2o,
neptune
)
df <- read_csv(here::here("data/RKI_COVID19.csv"))
Parsed with column specification:
cols(
ObjectId = col_double(),
IdBundesland = col_double(),
Bundesland = col_character(),
Landkreis = col_character(),
Altersgruppe = col_character(),
Geschlecht = col_character(),
AnzahlFall = col_double(),
AnzahlTodesfall = col_double(),
Meldedatum = col_character(),
IdLandkreis = col_character(),
Datenstand = col_character(),
NeuerFall = col_double(),
NeuerTodesfall = col_double(),
Refdatum = col_character(),
NeuGenesen = col_double(),
AnzahlGenesen = col_double(),
IstErkrankungsbeginn = col_double(),
Altersgruppe2 = col_character()
)
df$Meldedatum <- as.Date(df$Meldedatum)
filterLandkreis <- "SK Münster"
df_m <- df %>% filter(Landkreis == filterLandkreis)
df_agg <- df_m %>% group_by (Meldedatum) %>% summarize (faelle = sum(AnzahlFall))
`summarise()` ungrouping output (override with `.groups` argument)
myTheme <- theme_ft_rc()
theme_set(myTheme)
df_agg %>% timetk::plot_anomaly_diagnostics(Meldedatum, faelle)
frequency = 6 observations per 1 week
trend = 75 observations per 3 months
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
ts <- df_agg %>% tk_augment_slidify(.value = faelle, .period = c(2, 6, 30), .f = mean, .partial = TRUE)
New names:
* NA -> ...1
* NA -> ...2
* NA -> ...3
h2o.init()
H2O is not running yet, starting it now...
Note: In case of errors look at the following log files:
C:\Users\kof\AppData\Local\Temp\RtmpOOO9qu\file445043515609/h2o_KoF_started_from_r.out
C:\Users\kof\AppData\Local\Temp\RtmpOOO9qu\file44504dd84bfe/h2o_KoF_started_from_r.err
java version "11.0.2" 2019-01-15 LTS
Java(TM) SE Runtime Environment 18.9 (build 11.0.2+9-LTS)
Java HotSpot(TM) 64-Bit Server VM 18.9 (build 11.0.2+9-LTS, mixed mode)
Starting H2O JVM and connecting: . Connection successful!
R is connected to the H2O cluster:
H2O cluster uptime: 9 seconds 82 milliseconds
H2O cluster timezone: Europe/Berlin
H2O data parsing timezone: UTC
H2O cluster version: 3.32.0.1
H2O cluster version age: 2 months and 27 days
H2O cluster name: H2O_started_from_R_KoF_hak113
H2O cluster total nodes: 1
H2O cluster total memory: 3.97 GB
H2O cluster total cores: 8
H2O cluster allowed cores: 8
H2O cluster healthy: TRUE
H2O Connection ip: localhost
H2O Connection port: 54321
H2O Connection proxy: NA
H2O Internal Security: FALSE
H2O API Extensions: Amazon S3, Algos, AutoML, Core V3, TargetEncoder, Core V4
R Version: R version 3.6.3 (2020-02-29)
df_h2o = as.h2o(ts)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
|
| | 0%
|
|============================================================================================================================================| 100%
model <- h2o.automl(training_frame = df_h2o,
nfolds = 3,
max_runtime_secs = 5,
x = c("faelle_roll_2","faelle_roll_6","faelle_roll_30"),
y = "faelle", exclude_algos = "StackedEnsemble")
|
| | 0%
|
|===== | 4%
08:56:39.904: AutoML: XGBoost is not available; skipping it.
|
|============================================================================= | 55%
|
|====================================================================================================================================== | 96%
|
|============================================================================================================================================| 100%
pred <- cbind(ts, as.data.frame(h2o.predict(model, df_h2o)))
|
| | 0%
|
|============================================================================================================================================| 100%
init_neptune(project_name = "frank.koehne/covid-drift",
api_token = Sys.getenv("NEPTUNE_API_TOKEN")
)
WARNING: There is a new version of neptune-client 0.4.130 (installed: 0.4.129).
Project(frank.koehne/covid-drift)
create_experiment(name = "Rolling Avg",
tags = c("experimental", "automl", "roll2", "roll6", "roll30"),
params = list(tuneLength = 5, model = "automl")
)
https://ui.neptune.ai/frank.koehne/covid-drift/e/COV-4
Experiment(COV-4)
set_property(property = "data-version",
value = max(df_m$Datenstand)
)
log_metric("n", nrow(df_agg))
log_metric("X-Val-RMSE", h2o.rmse(model@leader, xval = TRUE))
Results can be seen publicly.